#!/usr/bin/env python3
"""
Reconstruct results.json from existing step files in evaluation directories
"""

import json
import os
import re
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Any

def reconstruct_results_from_directory(eval_dir: Path) -> Dict[str, Any]:
    """Reconstruct results from an evaluation directory with step files."""
    
    # Parse directory name for config info
    dir_name = eval_dir.name
    match = re.match(r'qwen_eval_(\w+)_seed(\d+)_(\d{8}_\d{6})', dir_name)
    if match:
        difficulty = match.group(1)
        seed_index = int(match.group(2))
        timestamp = match.group(3)
    else:
        difficulty = "unknown"
        seed_index = 0
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Initialize results structure
    results = {
        "config": {
            "difficulty": difficulty,
            "seed_index": seed_index,
            "max_rounds": None,
            "model": "qwen2.5-omni-7b",
            "timestamp": timestamp,
            "output_directory": str(eval_dir),
            "reconstructed": True
        },
        "steps": [],
        "final_stats": {},
        "media_files": {
            "images": [],
            "audio": [],
            "responses": []
        },
        "media_summary": {}
    }
    
    # Collect step files
    images_dir = eval_dir / "images"
    audio_dir = eval_dir / "audio"
    responses_dir = eval_dir / "responses"
    
    # Get all step numbers
    step_numbers = set()
    
    if images_dir.exists():
        for img_file in images_dir.glob("step_*_image.jpg"):
            match = re.match(r'step_(\d+)_image\.jpg', img_file.name)
            if match:
                step_numbers.add(int(match.group(1)))
    
    if responses_dir.exists():
        for resp_file in responses_dir.glob("step_*_response.txt"):
            match = re.match(r'step_(\d+)_response\.txt', resp_file.name)
            if match:
                step_numbers.add(int(match.group(1)))
    
    if audio_dir.exists():
        for audio_file in audio_dir.glob("step_*_audio.json"):
            match = re.match(r'step_(\d+)_audio\.json', audio_file.name)
            if match:
                step_numbers.add(int(match.group(1)))
    
    # Process each step
    total_reward = 0.0
    for step_num in sorted(step_numbers):
        step_data = {
            "step": step_num,
            "command": [0, 0, 50, 50],  # Default command
            "command_desc": "Unknown command (reconstructed)",
            "reward": 0.0,
            "total_reward": total_reward,
            "score_normalized": 0.0,
            "rounds_remaining": 0,
            "objectives_completed": 0,
            "model_response_length": 0,
            "terminated": False,
            "truncated": False,
            "media_paths": {"image": None, "audio": None, "response": None}
        }
        
        # Check for image file
        img_file = images_dir / f"step_{step_num:03d}_image.jpg"
        if img_file.exists():
            step_data["media_paths"]["image"] = f"images/{img_file.name}"
            results["media_files"]["images"].append({
                "step": step_num,
                "path": f"images/{img_file.name}"
            })
        
        # Check for audio file
        audio_file = audio_dir / f"step_{step_num:03d}_audio.json"
        if audio_file.exists():
            step_data["media_paths"]["audio"] = f"audio/{audio_file.name}"
            results["media_files"]["audio"].append({
                "step": step_num,
                "path": f"audio/{audio_file.name}"
            })
        
        # Check for response file and extract info
        resp_file = responses_dir / f"step_{step_num:03d}_response.txt"
        if resp_file.exists():
            step_data["media_paths"]["response"] = f"responses/{resp_file.name}"
            results["media_files"]["responses"].append({
                "step": step_num,
                "path": f"responses/{resp_file.name}"
            })
            
            # Try to extract command from response
            try:
                with open(resp_file, 'r', encoding='utf-8') as f:
                    response_text = f.read()
                    step_data["model_response_length"] = len(response_text)
                    
                    # Look for COMMAND pattern
                    command_match = re.search(r"COMMAND:\s*([^\n]+)", response_text, re.IGNORECASE)
                    if command_match:
                        step_data["command_desc"] = f"Command: {command_match.group(1).strip()}"
            except Exception as e:
                print(f"Warning: Could not read response file {resp_file}: {e}")
        
        results["steps"].append(step_data)
        total_reward += step_data["reward"]
    
    # Create final stats
    max_step = max(step_numbers) if step_numbers else 0
    results["final_stats"] = {
        "total_steps": max_step,
        "total_reward": total_reward,
        "final_score_normalized": 0.0,  # Unknown without log data
        "objectives_completed": 0,      # Unknown without log data
        "total_objectives": 0,          # Unknown without log data
        "success_rate": 0.0,
        "terminated": False,            # Unknown without log data
        "truncated": True,              # Likely since no results.json was created
        "reconstructed": True
    }
    
    # Media summary
    results["media_summary"] = {
        "total_images": len(results["media_files"]["images"]),
        "total_audio_files": len(results["media_files"]["audio"]),
        "total_response_files": len(results["media_files"]["responses"]),
        "output_directory": str(eval_dir),
        "reconstructed": True
    }
    
    return results

def main():
    """Find and reconstruct results for all evaluation directories."""
    current_dir = Path(".")
    
    # Find all evaluation directories
    eval_dirs = []
    for item in current_dir.iterdir():
        if item.is_dir() and item.name.startswith("qwen_eval_"):
            # Check if it has the expected subdirectories but no results.json
            if ((item / "images").exists() or (item / "audio").exists() or (item / "responses").exists()) and not (item / "results.json").exists():
                eval_dirs.append(item)
    
    if not eval_dirs:
        print("No evaluation directories found that need reconstruction.")
        return
    
    print(f"Found {len(eval_dirs)} evaluation directories to reconstruct:")
    for eval_dir in eval_dirs:
        print(f"  - {eval_dir.name}")
    
    print("\nReconstructing results...")
    
    for eval_dir in eval_dirs:
        try:
            print(f"\nProcessing {eval_dir.name}...")
            results = reconstruct_results_from_directory(eval_dir)
            
            # Save results.json
            results_file = eval_dir / "results.json"
            with open(results_file, 'w', encoding='utf-8') as f:
                json.dump(results, f, indent=2, ensure_ascii=False)
            
            print(f"  ✓ Created {results_file}")
            print(f"  ✓ Steps: {len(results['steps'])}")
            print(f"  ✓ Images: {results['media_summary']['total_images']}")
            print(f"  ✓ Audio: {results['media_summary']['total_audio_files']}")
            print(f"  ✓ Responses: {results['media_summary']['total_response_files']}")
            
        except Exception as e:
            print(f"  ✗ Error processing {eval_dir.name}: {e}")
    
    print("\nReconstruction complete!")

if __name__ == "__main__":
    main() 